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def main () : 

if len(sys.argv) != 3: 

print "Usage: docdistl.py filename_l filename_2" 

else: 

filename_l = sys.argvfl] 
filename_2 = sys.argv[2] 

sorted_word_list_l = word_frequencies_for_file(filename_l) 
sorted_word_list_2 = word_frequencies_for_file(filename_2) 
distance = vector_angle(sorted_word_list_l,sorted_word_list_2) 
print "The distance between the documents is: %0.6f (radians)" 
distance 
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def word_frequencies_for_file(filename): 
line_list = read_file(filename) 

word_list = get_words_from_line_list(line_list) 
freq_mapping = count_frequency(word_list) 
return freq_mapping 
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def get_words_from_line_list(L): 
word_list = [] 
for line in L: 

words_in_line = get_words_from_string(line) 
word_list = word_list + words_in_line 
return word_list 

def get_words_from_string(line): 
word_list = [] 
character_list = [] 
for c in line: 
if c.isalnum(): 

character_list.append(c) 
elif len(character_list)>0: 

word = "".join(character_list) 
word = word.lower() 
word_list.append(word) 
character_list = [] 
if len(character_list)>0: 

word = "".join(character_list) 
word = word.lower() 
word_list.append(word) 
return word_list 
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def count_frequency(word_list): 
L = [] 

for new_word in word_list: 
for entry in L: 

if new_word == entry[0]: 
entry[1] = entry[1] + 1 

break 
else: 

L.append([new_word, 1] ) 

return L 


def vector_angle(LI, L2) : 

numerator = inner_product(LI,L2) 

denominator = math.sqrt(inner_product(LI,LI)*inner_product(L2, L2)) 
return math.acos(numerator/denominator) 


def inner_product(LI, L2) : 
sum = 0.0 

for wordl, countl in LI: 
for word2, count2 in L2: 
if wordl == word2: 

sum += countl * count2 

return sum 


docdist2 


1 

2 
3 

1 

2 

3 

4 

5 

6 


if _name_ == "_main_ 

import cProfile 
cProfile.run("main()") 


def get_words_from_line_list(L): 
word_list = [] 
for line in L: 

words_in_line = get_words_from_string(line) 
word_list.extend(words_in_line) 
return word_list 
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def word_frequencies_for_file(filename): 
line_list = read_file(filename) 

word_list = get_words_from_line_list(line_list) 
freq_mapping = count_frequency(word_list) 
insertion_sort(freq_mapping) 
return freq_mapping 
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def count_frequency(word_list): 

D = {} 

for new_word in word_list: 
if new_word in D: 

D[new_word] = D[new_word]+1 

else: 

D[new_word] = 1 
return D.items() 
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translation_table = string.maketrans(string.punctuation+string. 
uppercase, 

" "*len(string.punctuation)+string.lowercase) 

def get_words_from_string(line): 

line = line.translate(translation_table) 
word_list = line.split() 
return word_list 
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def word_frequencies_for_file(filename): 
line_list = read_file(filename) 

word_list = get_words_from_line_list(line_list) 
freq_mapping = count_frequency(word_list) 
freq_mapping = merge_sort(freq_mapping) 
return freq_mapping 
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def merge_sort(A): 
n = len(A) 
if n==l: 

return A 

mid = n //2 

L = merge_sort(A[:mid]) 

R = merge_sort(A[mid:]) 
return merge(L,R) 

def merge(L, R) : 
i = 0 

j = 0 

answer = [] 

while i<len(L) and j<len(R): 
if L[i]<R[j]: 

answer.append(L[i]) 
i += 1 

else: 

answer.append(R[j]) 
j += 1 

if i<len (L) : 

answer.extend(L[i:]) 
if j<len(R): 

answer.extend (R [ j : ]) 
return answer 
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def count_frequency(word_list): 

D = {} 

for new_word in word_list: 
if new_word in D: 

D[new_word] = D[new_word]+1 

else: 

D[new_word] = 1 

return D 
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def word_frequencies_for_file(filename): 
line_list = read_file(filename) 

word_list = get_words_from_line_list(line_list) 
freq_mapping = count_frequency(word_list) 
return freq_mapping 
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def inner_product(D1,D2) : 
sum = 0.0 

for key in D1: 
if key in D2: 

sum += D1[key] * D2[key] 

return sum 
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def get_words_from_text(text): 

text = text.translate(translation_table) 
word_list = text.split() 
return word_list 

def word_frequencies_for_file(filename): 
text = read_file(filename) 
word_list = get_words_from_text(text) 
freq_mapping = count_frequency(word_list) 
return freq_mapping 
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